cap log close
clear
set mem 700m
set maxvar 10000
set more off
pause on

*1) Prep Baseline Data
use "ImmunizationBaseline.dta", clear

** I need to generate all the variables that I will analyze
destring locid d0* d2* , replace

/*clean up -999s -777s etc*/
foreach var of varlist d2* d0*  {
replace `var'=. if `var'==-999 | `var'==-777
}

/*Age in months*/
gen agemon=.
replace agemon=d0_2_1 if d0_2_2==3
replace agemon=d0_2_1/30 if d0_2_2==1
replace agemon=d0_2_1*(7/30) if d0_2_2==2
replace agemon=d0_2_1*(12) if d0_2_2==4


/*complete immunization for 12-60 mon*/
gen complete=.
replace complete=0 if agemon>=12 & agemon<=60 & d2_5<5 & d2_5!=.
replace complete=1 if agemon>=12 & agemon<=60 & d2_5>=5 & d2_5!=.

/*complete immunization for 12-24 mon*/
gen complete1to2=.
replace complete1to2=0 if agemon>=12 & agemon<=24 & d2_5<5 & d2_5!=.
replace complete1to2=1 if agemon>=12 & agemon<=24 & d2_5>=5 & d2_5!=.

/* complete immunization for 12-36 mon*/
gen complete1to3=.
replace complete1to3=0 if agemon>=12 & agemon<=36 & d2_5<5
replace complete1to3=1 if agemon>=12 & agemon<=36 & d2_5>=5 & d2_5!=.

/* Append "__1" to denote baseline observation of a given variable */
foreach var of varlist _all {
	rename `var' `var'__1
}
rename ID__1 ID

*Drop those without identifiers
drop if ID=="NA"
sort ID

/* Save a temporary version of the baseline data */
tempfile base
save `base'


*2) Endline
use "ImmunizationEndline.dta", clear

*merge baseline immunization data
merge 1:1 ID using `base'
drop _merge

*** Based on immunization camp dates, as well as survey dates
*** we can look at children who are 1-3 years of age

*5 or more shots, age 1-3
gen complete1to3=.
replace complete1to3=0 if agemon>=12 & agemon<=36 & d2_5<5 & d2_5!=.
replace complete1to3=1 if agemon>=12 & agemon<=36 & d2_5>=5 & d2_5!=.

*4 or more shots, age 1-3
gen completerelaxed1to3=.
replace completerelaxed1to3=0 if agemon>=12 & agemon<=36 & d2_5<4 & d2_5!=.
replace completerelaxed1to3=1 if agemon>=12 & agemon<=36 & d2_5>=4 & d2_5!=.

*4 or more shots, age 1-2
gen completerelaxed1to2=.
replace completerelaxed1to2=0 if agemon>=12 & agemon<=24 & d2_5<4 & d2_5!=.
replace completerelaxed1to2=1 if agemon>=12 & agemon<=24 & d2_5>=4 & d2_5!=.

*at least 1 shot, older than 1 year
gen atleast1shot=.
replace atleast1shot=0 if d2_5==0 & agemon>=12 & agemon!=.
replace atleast1shot=1 if d2_5>0 & d2_5!=. & agemon>=12 & agemon!=.

*at least 1 shot, age 1-3
gen atleast1shot_1to3=.
replace atleast1shot_1to3=0 if d2_5==0 & agemon>=12 & agemon<=36
replace atleast1shot_1to3=1 if d2_5>0 & d2_5!=. & agemon>=12 & agemon<=36

*number of shots, non-missing only for those age 1-3
gen d2_5_1to3=.
replace d2_5_1to3=d2_5 if agemon>=12 & agemon<=36

*at least 1 shot, non-missing only for those age 1-2
gen atleast1shot_1to2=.
replace atleast1shot_1to2=0 if d2_5==0 & agemon>=12 & agemon<=24
replace atleast1shot_1to2=1 if d2_5>0 & d2_5!=. & agemon>=12 & agemon<=24

*number of shots, non-missing only for those age 1-2
gen d2_5_1to2=.
replace d2_5_1to2=d2_5 if agemon>=12 & agemon<=24

***Indicators for which shots kids were likely to have received based on normal shot order
gen BCG=.
replace BCG=1 if d2_5>=1 & d2_5!=.
replace BCG=0 if d2_5==0 | d2_5==.

gen DPT1=.
replace DPT1=1 if d2_5>=2 & d2_5!=.
replace DPT1=0 if d2_5<=1 | d2_5==.

gen DPT2=.
replace DPT2=1 if d2_5>=3 & d2_5!=.
replace DPT2=0 if d2_5<=2 | d2_5==.

gen DPT3=.
replace DPT3=1 if d2_5>=4 & d2_5!=.
replace DPT3=0 if d2_5<=3 | d2_5==.

gen measles=.
replace measles=1 if d2_5>=5 & d2_5!=.
replace measles=0 if d2_5<=4 | d2_5==.


** I also need to create dependent variables that only take on
** a value if the child was 0-18 months during the baseline

gen d2_5_bc=d2_5 if agemon__1>=0 & agemon__1<=18
gen complete1to3_bc=complete1to3 if agemon__1>=0 & agemon__1<=18
gen atleast1shot_bc=atleast1shot if agemon__1>=0 & agemon__1<=18
gen completerelaxed1to3_bc=completerelaxed1to3 if agemon__1>=0 & agemon__1<=18

** I need to create a control variable that reflects the average hamlet
** immunization rate in baseline

sort locid
by locid: egen completehambase=mean(complete__1)
by locid: egen shotshambase=mean(d2_5__1)

*various potential treatment statuses
gen treat1=NonTreat==1
gen treat2=TreatNonIncentive==1
gen treat3=TreatIncentive==1

*save a new dataset with the generated variables in the current directory
save "imm_mergedwHHandBaseline.dta", replace

